version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220909          // set seed
set scheme cleanplots      // clean plots
/* 
Notes: 
1) Must put "set sortseed #" right on top of EACH "telasso" (rather than at the top of the do file) to ensure the same number of selected controls will be reported.
2) For detailed explanations, see Stata's technical support's email chain on 10/10/2023. 
*/

local pgm  "dst-Figure_9_other_outcomes_telasso"        // file name
local who  "Muzhe Yang"                                 // author
local dte  "2025-01-21"                                 // created date
local dte2 "`c(current_date)'"                          // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\analysis\figures\\`pgm'.txt", text replace 
display "`tag'"

**# data prep ------------------------------------------------------------------------------------

use "data_clean\dst-data04_for_estimation_within_250_miles", clear
summ dist_to_border
tab wave, missing
drop depression teeth_lost dental mammo_use cervical_screen colon_screen core_prev_m core_prev_f hlth_fair_or_poor
des cancer diabetes heart_disease obesity stroke
codebook TractFIPS

*## (1) create the 9 cells, following page 215 of the following paper:
*## Giuntella, O. and F. Mazzonna (2019). "Sunset Time and the Economic Effects of Social Jetlag: Evidence from US Time Zone Borders." Journal of Health Economics 65: 210-226.

assert !missing(centroid_lat)
assert !missing(region)
gen     cell = 1 if centroid_lat > 40                        & region == "eastern and central"
replace cell = 2 if (34 < centroid_lat & centroid_lat <= 40) & region == "eastern and central"
replace cell = 3 if centroid_lat <= 34                       & region == "eastern and central"
replace cell = 4 if centroid_lat > 40                        & region == "central and mountain"
replace cell = 5 if (34 < centroid_lat & centroid_lat <= 40) & region == "central and mountain"
replace cell = 6 if centroid_lat <= 34                       & region == "central and mountain"
replace cell = 7 if centroid_lat > 40                        & region == "mountain and pacific"
replace cell = 8 if (34 < centroid_lat & centroid_lat <= 40) & region == "mountain and pacific"
replace cell = 9 if centroid_lat <= 34                       & region == "mountain and pacific"
tab cell time_zone, missing
summ centroid_lat if cell == 1 | cell == 4 | cell == 7
summ centroid_lat if cell == 2 | cell == 5 | cell == 8
summ centroid_lat if cell == 3 | cell == 6 | cell == 9

*## (2) control variables

global x "pop white_pct black_pct hispanic_pct pop_under_18yr_pct pop_65yr_over_pct age_median educ_hs_pct educ_coll_pct married_pct hh_size hh_income_median home_value_median no_health_ins_pct unemploy_pct"
global cvars    c.($x dist_to_border centroid_lat daylight_dur_max)##c.($x dist_to_border centroid_lat daylight_dur_max)
global controls $cvars i.cell i.cell#c.($x dist_to_border centroid_lat daylight_dur_max) i.wave i.wave#c.($x dist_to_border centroid_lat daylight_dur_max)

des  $x dist_to_border centroid_lat daylight_dur_max
summ $x dist_to_border centroid_lat daylight_dur_max

**# estimation prep -------------------------------------------------

encode CountyFIPS, gen(county_fips)
egen nomiss = rowmiss($x dist_to_border centroid_lat daylight_dur_max)
assert !missing(StateAbbr)
local radius = 50
gen sample_selection = (nomiss == 0 & StateAbbr != "AZ" & dist_to_border <= `radius')
global xfolds_resample "xfolds(10) resample(3) nolog"

*## create a composite health index baesed on the estimation sample, following page 215 of the following paper:
*## Giuntella, O. and F. Mazzonna (2019). "Sunset Time and the Economic Effects of Social Jetlag: Evidence from US Time Zone Borders." Journal of Health Economics 65: 210-226.

foreach var in cancer diabetes heart_disease obesity stroke {
    egen double `var'_z = std(`var') if sample_selection == 1
    summ `var'_z if sample_selection == 1
}
egen double index = rowmean(cancer_z diabetes_z heart_disease_z obesity_z stroke_z) if sample_selection == 1
summ index if sample_selection == 1

**# figure ----------------------------------------------------------------------------------------------

foreach y in cancer diabetes heart_disease obesity stroke index {
    set sortseed 12102023
    telasso (`y' $controls) (treat $controls) if sample_selection == 1, vce(cluster county_fips) $xfolds_resample rseed(10101)
    estimates store `y'_full
        
    set sortseed 12102023
    telasso (`y' $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central" & cell == 1, vce(cluster county_fips) $xfolds_resample rseed(10101)
    estimates store `y'_et_ct_north
    
    set sortseed 12102023
    telasso (`y' $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central" & (cell == 2 | cell == 3), vce(cluster county_fips) $xfolds_resample rseed(10101)
    estimates store `y'_et_ct_south 
}
foreach var in full et_ct_north et_ct_south {
    estimates restore cancer_`var'
    local n_obs_`var' = e(N)
}

coefplot (cancer_full, label("All time zones in the contiguous United States; sample size = `n_obs_full'")) ///
         (cancer_et_ct_north, label("Northern part of the eastern and central time zones, defined by latitude (in degrees) of the centroid of a census tract > 40; sample size = `n_obs_et_ct_north'")) ///
         (cancer_et_ct_south, label("Southern part of the eastern and central time zones, defined by latitude (in degrees) of the centroid of a census tract ≤ 40; sample size = `n_obs_et_ct_south'")), bylabel("Cancer") || ///
         (diabetes_full) ///
         (diabetes_et_ct_north) ///
         (diabetes_et_ct_south), bylabel("Diabetes") || ///
         (heart_disease_full) ///
         (heart_disease_et_ct_north) ///
         (heart_disease_et_ct_south), bylabel("Heart disease") || ///
         (obesity_full) ///
         (obesity_et_ct_north) ///
         (obesity_et_ct_south), bylabel("Obesity") || ///
         (stroke_full) ///
         (stroke_et_ct_north) ///
         (stroke_et_ct_south), bylabel("Stroke") || ///
         (index_full) ///
         (index_et_ct_north) ///
         (index_et_ct_south), bylabel("Index") || ///
         , keep(r1vs0.treat) ///
         coeflabels(r1vs0.treat = `""Treat (1/0):" "1 for census tracts" "located east of" "the time zone border;" "0 for census tracts" "located west of" "the time zone border""', labsize(*0.8)) ///
         mlabels mlabsize(*0.8) mlabcolor(black) mlabformat(%9.3f) mlabposition(12) mlabgap(*2) ///
         xline(0, lpattern(dash) lwidth(*1) lcolor(gray)) ///
         byopts(xrescale noiytick iscale(*0.9)) ///
         levels(95 90) ciopts(lwidth(*3 *3) lcolor(*.3 *.6)) ///
         legend(size(*0.75) position(6))

graph save "code\analysis\figures\\`pgm'", replace
graph export "code\analysis\figures\\`pgm'.png", replace
graph export "code\analysis\figures\\`pgm'.emf", replace

log close
exit